from Transfer import *
import time
from collections import OrderedDict
from features_extraction import *
%matplotlib inline
We load the VGG19 model and set GPU mode. Then we generate default parameters for the transfer.
net, transformer = build_net()
caffe.set_mode_gpu()
args = args(net, transformer)
# Visualize parameters
args.infos()
See what are the available images
!ls ./images/
See which GPU and CPU is used
!nvidia-smi -L
!lscpu | grep Model
See the blobs in the network which we can use
for layer in reversed(net.blobs.keys()):
print layer
args.change_content('tubingen')
args.change_style('shipwreck')
args.start = 'random'
args.content_layer = 'conv4_2'
args.style_scale = 1.2
args.ratio = 1e-3
args.lengths = 512
args.optimization['maxiter'] = 1000
args.style_weights = OrderedDict([('conv1_1', 1./5),
('conv2_1', 1./5),
('conv3_1', 1./5),
('conv4_1', 1./5),
('conv5_1', 1./5)])
output = transfer(*args.get())
print_image([args.content,args.style,output])
save(output,args)
args.change_content('tubingen')
args.change_style('starry_night')
args.start = 'random'
args.content_layer = 'conv4_2'
args.style_scale = 1.
args.ratio = 1e-4
args.lengths = 512
args.optimization['maxiter'] = 500
args.style_weights = OrderedDict([('conv1_1', 1./5),
('conv2_1', 1./5),
('conv3_1', 1./5),
('conv4_1', 1./5),
('conv5_1', 1./5)])
output = transfer(*args.get())
print_image([args.content,args.style,output])
save(output,args)
args.change_content('tubingen')
args.change_style('the_scream')
args.start = 'random'
args.content_layer = 'conv4_2'
args.style_scale = 1.
args.ratio = 1e-3
args.optimization['maxiter'] = 1000
args.style_weights = OrderedDict([('conv1_1', 1./5),
('conv2_1', 1./5),
('conv3_1', 1./5),
('conv4_1', 1./5),
('conv5_1', 1./5)])
output = transfer(*args.get())
print_image([args.content,args.style,output])
save(output,args)
args.change_style('seated_nude')
args.ratio = 1e-4
args.style_scale = 1
args.start = 'random'
args.optimization['maxiter'] = 1000
output = transfer(*args.get())
print_image([args.content,args.style,output])
save(output,args)
args.change_style('composition_VII')
args.style_scale = 1.
args.ratio = 1e-4
args.optimization['maxiter'] = 1000
output = transfer(*args.get())
print_image([args.content,args.style,output])
save(output,args)
We can now test on our own image. Here is a transfer on a friend's picture now we now initialize the input with the content directly in order to decrease the number of iterations.
args.change_style('texture')
args.change_content('charles')
args.start = 'content'
args.style_scale = 1.2
args.ratio = 1e-5
args.optimization['maxiter'] = 500
output = transfer(*args.get())
print_image([args.content,args.style,output])
You can see that the transfer take into account the shadow in the picture and the structure is following its line. Now we are suppressing it by mixing in the input the content image with a bit of style image.
args.change_style('texture')
args.change_content('charles')
args.start = 'mixed'
args.ratio = 1e-2
args.style_scale = 1.2
args.content_layer = 'conv5_1'
args.style_weights = OrderedDict([('conv1_1',1./5),
('conv2_1',1./5),
('conv3_1',1./5),
('conv4_1',1./5),
('conv5_1',1./5)])
args.optimization['maxiter'] = 500
output = transfer(*args.get())
print_image([args.content,args.style,output])
We in order to see the different parameters influence, we do several transfer tuning the ratio the number of layers to represent the style picked in $\{ \textbf{conv1}_1,\textbf{conv2}_1,\textbf{conv3}_1,\textbf{conv4}_1,\textbf{conv5}_1, \}$
args.change_style('composition_VII')
args.change_content('tubingen')
args.style_scale = 1.2
args.lengths = 224
args.start = 'random'
args.optimization['maxiter'] = 100
args.content_layer = 'conv4_2'
w1 = OrderedDict([('conv1_1',1.)])
w2 = OrderedDict([('conv1_1',1./2),('conv2_1',1./2)])
w3 = OrderedDict([('conv1_1',1./3),('conv2_1',1./3),('conv3_1',1./3)])
w4 = OrderedDict([('conv1_1',1./4),('conv2_1',1./4),('conv3_1',1./4),('conv4_1',1./4)])
w5 = OrderedDict([('conv1_1',1./5),('conv2_1',1./5),('conv3_1',1./5),('conv4_1',1./5),('conv5_1',1./5)])
lr = [1e-5,1e-4,1e-3,1e-2]
ws = [w1,w2,w3,w4,w5]
outputs = []
for k in range(5):
for p in range(4):
args.style_weights = ws[k]
args.ratio = lr[p]
outputs.append(transfer(*args.get()))
print_image(outputs,(5,4))
The higher the layers are the more we can generate correct structure (with a good scale and propagation). When we increase the ratio we manage to focus the style generation on the content.
Now what happens when we try to do the same transfer as in the paper [3]. In this, the team can filter a content image with a small image of structure.
args.change_style('rug')
args.change_content('girl')
args.start = 'random'
args.ratio = 1e-4
args.style_scale = 127./300
args.lengths = 300
args.content_layer = 'conv4_2'
args.style_weights = OrderedDict([('conv1_1',1./5),
('conv2_1',1./5),
('conv3_1',1./5),
('conv4_1',1./5),
('conv5_1',1./5)])
args.optimization['maxiter'] = 1000
output = transfer(*args.get())
print_image([args.content,args.style,output])
Even with only 1000 iterations and a very small ratio we can't propagate the structure of the rug through the whole image. We now increase the size of the style image so that the content and the style have the same size.
args.change_style('rug')
args.change_content('girl')
args.start = 'random'
args.ratio = 1e-3
args.style_scale = 127./224
args.lengths = 224
args.content_layer = 'conv4_2'
args.style_weights = OrderedDict([('conv1_1',1./5),
('conv2_1',1./5),
('conv3_1',1./5),
('conv4_1',1./5),
('conv5_1',1./5)])
args.optimization['maxiter'] = 1000
output = transfer(*args.get())
print_image([args.content,args.style,output])
As we could imagine the local structure is bigger but we can't either propagate it through the whole image. We have to use a style image with a lot more structure. For that we use a bigger similar structure and we scale into the same size of the content image.
args.change_style('rug_tall')
args.change_content('girl')
args.start = 'random'
args.ratio = 1e-5
args.style_scale = 1.
args.lengths = 300
args.content_layer = 'conv4_2'
args.style_weights = OrderedDict([('conv1_1',1./5),
('conv2_1',1./5),
('conv3_1',1./5),
('conv4_1',1./5),
('conv5_1',1./5)])
args.optimization['maxiter'] = 1000
output = transfer(*args.get())
print_image([args.content,args.style,output])
save(output,args)
Here the result is what we expected.
Let's see what happens when we extend the structure of the first style image
structure = load('rug')
style = np.zeros((structure.shape[0]*3,structure.shape[1]*3,3))
style[:,:,0] = np.asarray(np.matlib.repmat(np.asmatrix(structure[:,:,0]),3,3))
style[:,:,1] = np.asarray(np.matlib.repmat(np.asmatrix(structure[:,:,1]),3,3))
style[:,:,2] = np.asarray(np.matlib.repmat(np.asmatrix(structure[:,:,2]),3,3))
imsave('images/rug_extended.jpg',style)
args.change_style('rug_extended')
args.change_content('girl')
args.start = 'content'
args.ratio = 1e-4
args.style_scale = 1.
args.lengths = 300
args.content_layer = 'conv4_2'
args.style_weights = OrderedDict([('conv1_1',1./5),
('conv2_1',1./5),
('conv3_1',1./5),
('conv4_1',1./5),
('conv5_1',1./5)])
args.optimization['maxiter'] = 1000
output = transfer(*args.get())
print_image([args.content,args.style,output])
We can conclude that small images don't have enough information on the texture (not enough pattern). Here we had to stich the same small image to get a bigger one in order to fully propagate the style on the content image.
args.change_style('rug_tall')
args.change_content('girl')
args.start = 'random'
args.ratio = 1e-4
args.style_scale = 1.
args.lengths = 300
args.content_layer = 'conv4_2'
args.style_weights = OrderedDict([('conv1_1',1./5),
('conv2_1',1./5),
('conv3_1',1./5),
('conv4_1',1./5),
('conv5_1',1./5)])
args.optimization['maxiter'] = 1000
output = transfer(*args.get())
print_image([args.content,args.style,output])